################
#PSRM: Explaining Support for Redistribution: Social Insurance Systems and Fairness
#
#Observational Data: Eurobarometer
#Part I: Recode
#
#Verena Fetscher
#July 2022
####################

rm(list=ls())


####################
# Load data files
####################

EB<-read.dta13("ZA6939_v2-0-0.dta",convert.factors = FALSE)


####################
# Variable Selection
####################

# Variables:
# Variable country
# Variable qa1d_2: STATEMENTS: GOVERNMENT SHLD TAKE MEASURES TO REDUCE INCOME DIFFS
# Variable qa1b_3: STATEMENTS: I HAVE EQ OPPORTUNITIES IN CNTRY
# Variable qa1e_1: STATEMENTS: IMMIGRATION INTO CNTRY IS A GOOD THING
# Variable qa2_1: IMPORTANT FOR GETTING AHEAD IN LIFE - COMING FROM WEALTHY FAMILY
# Variable qa2_2: IMPORTANT FOR GETTING AHEAD IN LIFE - GOOD EDUCATION
# Variable qa2_3: IMPORTANT FOR GETTING AHEAD IN LIFE - WORKING HARD
# Variable qa2_6: IMPORTANT FOR GETTING AHEAD IN LIFE - BEING LUCKY
# Variable qa2_7: IMPORTANT FOR GETTING AHEAD IN LIFE - SPECIFIC ETHNIC ORIGIN
# Variable qa2_8: IMPORTANT FOR GETTING AHEAD IN LIFE - MAN OR WOMAN
# Variable qa4_1: NEIGHBOURHOOD - RICH/POOR
# Variable qa9a: HIGHEST LEVEL OF EDUCATION - RESPONDENT
# Variable qa9b: HIGHEST LEVEL OF EDUCATION - FATHER
# Variable qa9c: HIGHEST LEVEL OF EDUCATION - MOTHER
# Variable qa9d: HIGHEST LEVEL OF EDUCATION - PATERNAL GRANDFATHER
# Variable qa9e: HIGHEST LEVEL OF EDUCATION - MATERNAL GRANDFATHER
# Variable qa9t.1: EDUCATION HIGHER THAN FATHER
# Variable qa9t.2: EDUCATION EQUAL TO FATHER
# Variable qa9t.7: EDUCATION FATHER HIGHER THAN PATERNAL GRANDFATHER
# Variable qa9t.8: EDUCATION FATHER EQUAL TO PATERNAL GRANDFATHER
# Variable d10: GENDER
# Variable d11r1: AGE RECODED - 4 CATEGORIES
# Variable d11r2: AGE RECODED - 6 CATEGORIES
# Variable d11r3: AGE RECODED - 7 CATEGORIES
# Variable d1: LEFT-RIGHT PLACEMENT
# Variable qa11r: ANNUAL HH INCOME CATEGORY (REC)


####################
# Country selection and weighting
####################
EB %>%
  filter(country==1|country==2|country==3|country==4|
           country==5|country==6|country==7|country==8|
           country==9|country==10|country==11|country==12|
           country==13|country==14|country==16|country==17|
           country==18|country==19|country==20|country==21|
           country==22|country==23|country==24|country==25|
           country==26|country==27|country==28|country==29|
           country==30|country==32) -> EB

table(EB$isocntry)

# Country names with Germany and UK separated
EB$cntry_sep<-NA
EB$cntry_sep[EB$country==1]<-"France"
EB$cntry_sep[EB$country==2]<-"Belgium"
EB$cntry_sep[EB$country==3]<-"Netherlands"
EB$cntry_sep[EB$country==4]<-"Germany - West"
EB$cntry_sep[EB$country==5]<-"Italy"
EB$cntry_sep[EB$country==6]<-"Luxembourg"
EB$cntry_sep[EB$country==7]<-"Denmark"
EB$cntry_sep[EB$country==8]<-"Ireland"
EB$cntry_sep[EB$country==9]<-"Great Britain"
EB$cntry_sep[EB$country==10]<-"Northern Ireland"
EB$cntry_sep[EB$country==11]<-"Greece"
EB$cntry_sep[EB$country==12]<-"Spain"
EB$cntry_sep[EB$country==13]<-"Portugal"
EB$cntry_sep[EB$country==14]<-"Germany - East"
EB$cntry_sep[EB$country==16]<-"Finland"
EB$cntry_sep[EB$country==17]<-"Sweden"
EB$cntry_sep[EB$country==18]<-"Austria"
EB$cntry_sep[EB$country==19]<-"Cyprus"
EB$cntry_sep[EB$country==20]<-"Czech Republic"
EB$cntry_sep[EB$country==21]<-"Estonia"
EB$cntry_sep[EB$country==22]<-"Hungary"
EB$cntry_sep[EB$country==23]<-"Latvia"
EB$cntry_sep[EB$country==24]<-"Lithuania"
EB$cntry_sep[EB$country==25]<-"Malta"
EB$cntry_sep[EB$country==26]<-"Poland"
EB$cntry_sep[EB$country==27]<-"Slovakia"
EB$cntry_sep[EB$country==28]<-"Slovenia"
EB$cntry_sep[EB$country==29]<-"Bulgaria"
EB$cntry_sep[EB$country==30]<-"Romania"
EB$cntry_sep[EB$country==32]<-"Croatia"

# Adjust w1: WEIGHT RESULT FROM TARGET (REDRESSMENT)
# Weight result from target - cntr_de
EB$w3a <- EB$w1
EB$de[EB$country == 4 | EB$country == 14] <- 1
EB$w3a[EB$de==1] <- 0 
EB$w3a <- EB$w3a+EB$w3 # w3: WEIGHT GERMANY

# Weight result from target - cntr_gb
EB$w4a <- EB$w1
EB$gb[EB$country == 9 | EB$country == 10] <- 1
EB$w4a[EB$gb==1] <- 0 
EB$w4a <- EB$w4a+EB$w4 # w4: WEIGHT UNITED KINGDOM

# Weight result from target - Country
EB$w3a4a <- EB$w3a
EB$w3a4a[EB$gb==1] <- 0
EB$w3a4a <- EB$w3a4a+EB$w4

#EB %>%
 # select(country,cntry_sep,de,w3a,w3,w4,w1,isocntry,w3a4a) -> check

# Country names with Germany and UK unified
EB$cntry<-EB$cntry_sep
EB$cntry[EB$cntry=="Germany - East"|EB$cntry=="Germany - West"]<- "Germany"
EB$cntry[EB$cntry=="Great Britain"|EB$cntry=="Northern Ireland"]<- "United Kingdom"

EB$cntry_short<-EB$isocntry
EB$cntry_short[EB$cntry_short=="DE-E"|EB$cntry_short=="DE-W"]<- "DE"
EB$cntry_short[EB$cntry_short=="GB-GBN"|EB$cntry_short=="GB-NIR"]<- "GB"

####################
# Socio-demographics
####################
# Gender
EB$gender<-0
EB$gender[EB$d10==2]<-1 # female

# Age
EB$age<-EB$d11

# Age^2
EB$age_2<-EB$age*EB$age

# Age recoded
EB$age_cohort55<-EB$d11r1
EB$age_cohort65<-EB$d11r2
EB$age_cohort75<-EB$d11r3


# Generation
EB$gen85<-EB$gen1
EB$gen75<-EB$gen2
EB$pre_war<-EB$gen3
EB$baby_boom<-EB$gen4
EB$gener_X<-EB$gen5
EB$millenials<-EB$gen6


# Generate generations (following pew research center categories)
EB$generation<-NA
EB$generation[EB$gen1==1]<-"pre-1927" # 
EB$generation[EB$gen2==1]<-"1928-45" # silent
EB$generation[EB$gen4==1]<-"1946-64" # boomers
EB$generation[EB$gen5==1]<-"1965-80" # generation x
EB$generation[EB$gen6==1]<-"1981-post" # millennials and  generation z

subset(EB,select = c("cntry","gen1","gen2","gen4","gen5","gen6","generation"))

# Students
EB$student25 <- 0
EB$student25[EB$d15a==2&EB$age<25] <- 1

# Annual HH income category (rec)
EB$income<-EB$qa11r
EB$income[EB$qa11r==6|EB$qa11r==7|EB$qa11r==9]<-NA

# Income groups
EB$incgroup[EB$income==1|EB$income==2|EB$income==3]<-1
EB$incgroup[EB$income==3]<-2
EB$incgroup[EB$income==4|EB$income==5]<-3

EB$incgroup <- as.factor(EB$incgroup)

# Type of community
EB$urban<-NA
EB$urban[EB$d25==1]<-1 # Rural area
EB$urban[EB$d25==2]<-2 # Small town
EB$urban[EB$d25==3]<-3 # Large town

# Left-right placement
EB$leftright<-EB$d1
EB$leftright[EB$d1==97|EB$d1==98]<-NA

# Neighborhood rich/poor
EB$neighborhood<-NA
EB$neighborhood[EB$qa4_1==5]<-5 # Very rich
EB$neighborhood[EB$qa4_1==4]<-4
EB$neighborhood[EB$qa4_1==3]<-3
EB$neighborhood[EB$qa4_1==2]<-2
EB$neighborhood[EB$qa4_1==1]<-1 # Very poor



####################
# Fairness and Redistribution
####################

# Redistribution
EB$redistr<-NA
EB$redistr[EB$qa1d_2==5]<-1
EB$redistr[EB$qa1d_2==4]<-2
EB$redistr[EB$qa1d_2==3]<-3
EB$redistr[EB$qa1d_2==2]<-4
EB$redistr[EB$qa1d_2==1]<-5

# STATEMENTS: I HAVE EQ OPPORTUNITIES IN CNTRY (Nowadays in (OUR COUNTRY) I have equal opportunities for getting ahead in life, like everyone else)
EB$eqOpp<-NA
EB$eqOpp[EB$qa1b_3==5]<-1
EB$eqOpp[EB$qa1b_3==4]<-2
EB$eqOpp[EB$qa1b_3==3]<-3
EB$eqOpp[EB$qa1b_3==2]<-4
EB$eqOpp[EB$qa1b_3==1]<-5

# Important for getting ahead in life - coming from wealthy family
EB$imp_wealthFam<-NA
EB$imp_wealthFam[EB$qa2_1==5]<-1
EB$imp_wealthFam[EB$qa2_1==4]<-2
EB$imp_wealthFam[EB$qa2_1==3]<-3
EB$imp_wealthFam[EB$qa2_1==2]<-4
EB$imp_wealthFam[EB$qa2_1==1]<-5

# Important for getting ahead in life - good education
EB$imp_edu<-NA
EB$imp_edu[EB$qa2_2==5]<-1
EB$imp_edu[EB$qa2_2==4]<-2
EB$imp_edu[EB$qa2_2==3]<-3
EB$imp_edu[EB$qa2_2==2]<-4
EB$imp_edu[EB$qa2_2==1]<-5

# Important for getting ahead in life - working hard
EB$imp_workhard<-NA
EB$imp_workhard[EB$qa2_3==5]<-1
EB$imp_workhard[EB$qa2_3==4]<-2
EB$imp_workhard[EB$qa2_3==3]<-3
EB$imp_workhard[EB$qa2_3==2]<-4
EB$imp_workhard[EB$qa2_3==1]<-5

# Important for getting ahead in life - being lucky
EB$imp_lucky<-NA
EB$imp_lucky[EB$qa2_6==5]<-1
EB$imp_lucky[EB$qa2_6==4]<-2
EB$imp_lucky[EB$qa2_6==3]<-3
EB$imp_lucky[EB$qa2_6==2]<-4
EB$imp_lucky[EB$qa2_6==1]<-5

# IMPORTANT FOR GETTING AHEAD IN LIFE - KNOWING RIGHT PEOPLE (1 Not important at all - 5 Essential)
EB$imp_connections<-NA
EB$imp_connections[EB$qa2_4==5]<-1
EB$imp_connections[EB$qa2_4==4]<-2
EB$imp_connections[EB$qa2_4==3]<-3
EB$imp_connections[EB$qa2_4==2]<-4
EB$imp_connections[EB$qa2_4==1]<-5


# Important for getting ahead in life - specific ethnic origin
EB$imp_ethnOrigin<-NA
EB$imp_ethnOrigin[EB$qa2_7==5]<-1
EB$imp_ethnOrigin[EB$qa2_7==4]<-2
EB$imp_ethnOrigin[EB$qa2_7==3]<-3
EB$imp_ethnOrigin[EB$qa2_7==2]<-4
EB$imp_ethnOrigin[EB$qa2_7==1]<-5

# Important for getting ahead in life - man or woman
EB$imp_gender<-NA
EB$imp_gender[EB$qa2_8==5]<-1
EB$imp_gender[EB$qa2_8==4]<-2
EB$imp_gender[EB$qa2_8==3]<-3
EB$imp_gender[EB$qa2_8==2]<-4
EB$imp_gender[EB$qa2_8==1]<-5

# Immigration good thing
EB$immi<-NA
EB$immi[EB$qa1e_1==5]<-1
EB$immi[EB$qa1e_1==4]<-2
EB$immi[EB$qa1e_1==3]<-3
EB$immi[EB$qa1e_1==2]<-4
EB$immi[EB$qa1e_1==1]<-5


####################
# Education related variables
####################

# Highest level of education: respondent
EB$edu_r<-NA # Refusal, DK, and missing
EB$edu_r[EB$qa9a==5]<-5 # Completed upper level of education to master, doctoral degre
EB$edu_r[EB$qa9a==4]<-4 # Completed post secondary vocational studies, or higher educa
EB$edu_r[EB$qa9a==3]<-3 # Completed secondary
EB$edu_r[EB$qa9a==2]<-2 # Completed primary
EB$edu_r[EB$qa9a==1]<-1 # Not completed primary

# High education dummy: respondent
EB$edu_he_r<-NA
EB$edu_he_r[EB$qa9a==4|EB$qa9a==5]<-1
EB$edu_he_r[EB$qa9a==1|EB$qa9a==2|EB$qa9a==3]<-0

# Highest level of education: father
EB$edu_f<-NA
EB$edu_f[EB$qa9b==5]<-5
EB$edu_f[EB$qa9b==4]<-4
EB$edu_f[EB$qa9b==3]<-3
EB$edu_f[EB$qa9b==2]<-2
EB$edu_f[EB$qa9b==1]<-1

# High education dummy: father
EB$edu_he_f<-NA
EB$edu_he_f[EB$qa9b==4|EB$qa9b==5]<-1
EB$edu_he_f[EB$qa9b==1|EB$qa9b==2|EB$qa9b==3]<-0

# Highest level of education: mother
EB$edu_m<-NA
EB$edu_m[EB$qa9c==5]<-5
EB$edu_m[EB$qa9c==4]<-4
EB$edu_m[EB$qa9c==3]<-3
EB$edu_m[EB$qa9c==2]<-2
EB$edu_m[EB$qa9c==1]<-1

# High education dummy: mother
EB$edu_he_m<-NA
EB$edu_he_m[EB$qa9c==4|EB$qa9c==5]<-1
EB$edu_he_m[EB$qa9c==1|EB$qa9c==2|EB$qa9c==3]<-0

# Highest level of education: paternal grandfather
EB$edu_pgf<-NA
EB$edu_pgf[EB$qa9d==5]<-5
EB$edu_pgf[EB$qa9d==4]<-4
EB$edu_pgf[EB$qa9d==3]<-3
EB$edu_pgf[EB$qa9d==2]<-2
EB$edu_pgf[EB$qa9d==1]<-1

# High education dummy: paternal grandfather
EB$edu_he_pgf<-NA
EB$edu_he_pgf[EB$qa9d==4|EB$qa9d==5]<-1
EB$edu_he_pgf[EB$qa9d==1|EB$qa9d==2|EB$qa9d==3]<-0

# Highest level of education: maternal grandfather
EB$edu_mgf<-NA
EB$edu_mgf[EB$qa9e==5]<-5
EB$edu_mgf[EB$qa9e==4]<-4
EB$edu_mgf[EB$qa9e==3]<-3
EB$edu_mgf[EB$qa9e==2]<-2
EB$edu_mgf[EB$qa9e==1]<-1

# High education dummy: maternal grandfather
EB$edu_he_mgf<-NA
EB$edu_he_mgf[EB$qa9e==4|EB$qa9e==5]<-1
EB$edu_he_mgf[EB$qa9e==1|EB$qa9e==2|EB$qa9e==3]<-0

# High education dummy: at least one parent
EB$edu_he_p <- 0
EB$edu_he_p[EB$edu_he_f==1|EB$edu_he_f==1] <- 1
EB$edu_he_p[(is.na(EB$edu_he_f)&is.na(EB$edu_he_f))] <- NA

# High education dummy: at least one grandparent
EB$edu_he_gp <- 0
EB$edu_he_gp[EB$edu_he_mgf==1|EB$edu_he_pgf==1] <- 1
EB$edu_he_gp[(is.na(EB$edu_he_mgf)&is.na(EB$edu_he_mgf))] <- NA

# Identify respondents who fail to indicate the educational level of either
# themselves, parents or grandparents
EB$edu<-1
EB$edu[is.na(EB$edu_r)]<-0
EB$edu[(is.na(EB$edu_pgf)&is.na(EB$edu_mgf))]<-0
EB$edu[(is.na(EB$edu_f)&is.na(EB$edu_m))]<-0

####################
# Occupation
####################

EB$occu	<- EB$d15a
EB$occu[EB$d15a == 3 | EB$d15b == 15] <- 1 # unemployed, never worked
EB$occu[EB$d15a == 1] <- 2 # housework
EB$occu[EB$d15a == 2] <- 3 # student
EB$occu[EB$d15a == 4] <- 4 # retired
EB$occu[EB$d15a == 5 | EB$d15b == 1] <- 5 # farmer
EB$occu[EB$d15a == 6 | EB$d15b == 2] <- 6 # fisherman
EB$occu[EB$d15a == 18 | EB$d15b == 14] <- 7 # unskilled
EB$occu[EB$d15a == 17 | EB$d15b == 13] <- 8 # skill
EB$occu[EB$d15a == 16 | EB$d15b == 12] <- 9 # supervisor
EB$occu[EB$d15a == 15 | EB$d15b == 11] <- 10 # employed at desk
EB$occu[EB$d15a == 14 | EB$d15b == 10] <- 11 # traveling
EB$occu[EB$d15a == 13 | EB$d15b == 9] <- 12 # desk
EB$occu[EB$d15a == 8 | EB$d15b == 4] <- 13 # shop
EB$occu[EB$d15a == 12 | EB$d15b == 8] <- 14 # middle management
EB$occu[EB$d15a == 10 | EB$d15b == 6] <- 15 # employed prof
EB$occu[EB$d15a == 7 | EB$d15b == 3] <- 16 # professional
EB$occu[EB$d15a == 11 | EB$d15b == 7] <- 17 # general management
EB$occu[EB$d15a == 9 | EB$d15b == 5] <- 18 # business proprietor


EB$occu_rank <- NA
EB$occu_rank[EB$d15a == 3 | EB$d15b == 15] <- 1 # unemployed, never worked
EB$occu_rank[EB$d15a == 1 | EB$d15a == 2] <- 2 # students and housework
EB$occu_rank[EB$d15a == 5 | EB$d15a == 6 | EB$d15a == 18 | EB$d15b == 1 | EB$d15b == 2 | EB$d15b == 14] <- 3 # farmer, fish, other unskilled
EB$occu_rank[EB$d15a == 17 | EB$d15b == 13] <- 4 # skilled manual
EB$occu_rank[EB$d15a == 14 | EB$d15a == 15 | EB$d15b == 10 | EB$d15b == 11] <- 5 # employee, not at desk
EB$occu_rank[EB$d15a == 8 | EB$d15b == 4] <- 6 # shop-owner
EB$occu_rank[EB$d15a == 13 | EB$d15b == 9] <- 7 # employee, at desk
EB$occu_rank[EB$d15a == 12 | EB$d15a == 16 | EB$d15b == 8 | EB$d15b == 12] <- 8 # middle management
EB$occu_rank[EB$d15a == 10 | EB$d15a == 11 | EB$d15a == 7 | EB$d15b == 6 | EB$d15b == 7 | EB$d15b == 3] <- 9 # professionals & management
EB$occu_rank[EB$d15a == 9 | EB$d15b == 5] <- 10 # business proprietor

table(EB$occu_rank)

EB$unemp<-NA
EB$unemp[EB$occu_rank>1]<-0
EB$unemp[EB$occu_rank==1]<-1

table(EB$unemp)


# single household
# d40a: Could you tell me how many people aged 15 years or more live in your household, yourself included?
EB$hhsingle <- NA
EB$hhsingle[EB$d40a!=99] <- 0
EB$hhsingle[EB$d40a==1] <- 1


####################
# Select data
####################
EB%>%
  filter(cntry=="Austria"|cntry=="Belgium"|cntry=="Denmark"|cntry=="Finland"|cntry=="France"|
           cntry=="Germany"|cntry=="Ireland"|cntry=="Italy"|cntry=="Netherlands"|cntry=="Portugal"|
           cntry=="Spain"|cntry=="Sweden"|cntry=="United Kingdom") -> EB


####################
# Save data
####################
save(EB, file = "EB_17.Rda")
